{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Logistic Regression Classifie\n",
    "#Importing different libaries\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   PassengerId  Survived  Pclass  \\\n",
      "0            1         0       3   \n",
      "1            2         1       1   \n",
      "2            3         1       3   \n",
      "3            4         1       1   \n",
      "\n",
      "                                                Name     Sex   Age  SibSp  \\\n",
      "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
      "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
      "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
      "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
      "\n",
      "   Parch            Ticket     Fare Cabin Embarked  \n",
      "0      0         A/5 21171   7.2500   NaN        S  \n",
      "1      0          PC 17599  71.2833   C85        C  \n",
      "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
      "3      0            113803  53.1000  C123        S  \n"
     ]
    }
   ],
   "source": [
    "#Importing datasets\n",
    "data=pd.read_csv('train.csv')\n",
    "print(data.head(4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   Pclass                                               Name     Sex   Age  \\\n",
      "0       3                            Braund, Mr. Owen Harris    male  22.0   \n",
      "1       1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0   \n",
      "\n",
      "   SibSp  Parch     Ticket     Fare Cabin Embarked  \n",
      "0      1      0  A/5 21171   7.2500   NaN        S  \n",
      "1      1      0   PC 17599  71.2833   C85        C  \n"
     ]
    }
   ],
   "source": [
    "#Describing the features\n",
    "data_features=data.iloc[:,2:]\n",
    "print(data_features.head(2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0\n",
      "1    1\n",
      "Name: Survived, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "#Describing the label\n",
    "data_label=data['Survived']\n",
    "print(data_label.head(2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Prprocessing Data\n",
    "data_features=data_features.fillna(method='ffill',axis=0)#Filling all empty spaces\n",
    "#Representing Sex with numbers\n",
    "gender = {'male': 1,'female': 2}\n",
    "data_features.Sex=[gender[item] for item in data_features.Sex]\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Normalizing the Fare\n",
    "from sklearn import preprocessing\n",
    "fare=data_features[['Fare']].values.astype(float)\n",
    "min_max_scaler = preprocessing.MinMaxScaler()#MinMaxScalar API\n",
    "fare_scaled = min_max_scaler.fit_transform(fare)\n",
    "data_features['Fare']=fare_scaled\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Normalizing Age\n",
    "from sklearn import preprocessing\n",
    "age=data_features[['Age']].values.astype(float)\n",
    "min_max_scaler = preprocessing.MinMaxScaler()\n",
    "age_scaled=min_max_scaler.fit_transform(age)\n",
    "data_features['Age']=age_scaled\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Dropping two features\n",
    "data_features=data_features.drop(labels=['Name','Ticket'],axis=1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     Pclass  Sex     Age  SibSp  Parch      Fare        Cabin Embarked\n",
      "0         3    1  0.2750      1      0  0.014151          NaN        S\n",
      "1         1    2  0.4750      1      0  0.139136          C85        C\n",
      "2         3    2  0.3250      0      0  0.015469          C85        S\n",
      "3         1    2  0.4375      1      0  0.103644         C123        S\n",
      "4         3    1  0.4375      0      0  0.015713         C123        S\n",
      "5         3    1  0.0000      0      0  0.016510         C123        Q\n",
      "6         1    1  0.6750      0      0  0.101229          E46        S\n",
      "7         3    1  0.0250      3      1  0.041136          E46        S\n",
      "8         3    2  0.3375      0      2  0.021731          E46        S\n",
      "9         2    2  0.1750      1      0  0.058694          E46        C\n",
      "10        3    2  0.0500      1      1  0.032596           G6        S\n",
      "11        1    2  0.7250      0      0  0.051822         C103        S\n",
      "12        3    1  0.2500      0      0  0.015713         C103        S\n",
      "13        3    1  0.4875      1      5  0.061045         C103        S\n",
      "14        3    2  0.1750      0      0  0.015330         C103        S\n",
      "15        2    2  0.6875      0      0  0.031230         C103        S\n",
      "16        3    1  0.0250      4      1  0.056848         C103        Q\n",
      "17        2    1  0.0000      0      0  0.025374         C103        S\n",
      "18        3    2  0.3875      1      0  0.035134         C103        S\n",
      "19        3    2  0.3875      0      0  0.014102         C103        C\n",
      "20        2    1  0.4375      0      0  0.050749         C103        S\n",
      "21        2    1  0.4250      0      0  0.025374          D56        S\n",
      "22        3    2  0.1875      0      0  0.015672          D56        Q\n",
      "23        1    1  0.3500      0      0  0.069291           A6        S\n",
      "24        3    2  0.1000      3      1  0.041136           A6        S\n",
      "25        3    2  0.4750      1      5  0.061264           A6        S\n",
      "26        3    1  0.4750      0      0  0.014102           A6        C\n",
      "27        1    1  0.2375      3      2  0.513342  C23 C25 C27        S\n",
      "28        3    2  0.2375      0      0  0.015379  C23 C25 C27        Q\n",
      "29        3    1  0.2375      0      0  0.015412  C23 C25 C27        S\n",
      "..      ...  ...     ...    ...    ...       ...          ...      ...\n",
      "861       2    1  0.2625      1      0  0.022447          E17        S\n",
      "862       1    2  0.6000      0      0  0.050610          D17        S\n",
      "863       3    2  0.6000      8      2  0.135753          D17        S\n",
      "864       2    1  0.3000      0      0  0.025374          D17        S\n",
      "865       2    2  0.5250      0      0  0.025374          D17        S\n",
      "866       2    2  0.3375      1      0  0.027050          D17        C\n",
      "867       1    1  0.3875      0      0  0.098561          A24        S\n",
      "868       3    1  0.3875      0      0  0.018543          A24        S\n",
      "869       3    1  0.0500      1      1  0.021731          A24        S\n",
      "870       3    1  0.3250      0      0  0.015412          A24        S\n",
      "871       1    2  0.5875      1      1  0.102579          D35        S\n",
      "872       1    1  0.4125      0      0  0.009759  B51 B53 B55        S\n",
      "873       3    1  0.5875      0      0  0.017567  B51 B53 B55        S\n",
      "874       2    2  0.3500      1      0  0.046845  B51 B53 B55        C\n",
      "875       3    2  0.1875      0      0  0.014102  B51 B53 B55        C\n",
      "876       3    1  0.2500      0      0  0.019218  B51 B53 B55        S\n",
      "877       3    1  0.2375      0      0  0.015412  B51 B53 B55        S\n",
      "878       3    1  0.2375      0      0  0.015412  B51 B53 B55        S\n",
      "879       1    2  0.7000      0      1  0.162314          C50        C\n",
      "880       2    2  0.3125      0      1  0.050749          C50        S\n",
      "881       3    1  0.4125      0      0  0.015412          C50        S\n",
      "882       3    2  0.2750      0      0  0.020527          C50        S\n",
      "883       2    1  0.3500      0      0  0.020495          C50        S\n",
      "884       3    1  0.3125      0      0  0.013761          C50        S\n",
      "885       3    2  0.4875      0      5  0.056848          C50        Q\n",
      "886       2    1  0.3375      0      0  0.025374          C50        S\n",
      "887       1    2  0.2375      0      0  0.058556          B42        S\n",
      "888       3    2  0.2375      1      2  0.045771          B42        S\n",
      "889       1    1  0.3250      0      0  0.058556         C148        C\n",
      "890       3    1  0.4000      0      0  0.015127         C148        Q\n",
      "\n",
      "[891 rows x 8 columns]\n"
     ]
    }
   ],
   "source": [
    "print(data_features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Encoding EMBARKED Column \n",
    "embarked = {'S': 1,'C': 2,'Q':3}\n",
    "data_features.Embarked=[embarked[item] for item in data_features.Embarked]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Dropping from Cabin\n",
    "data_features=data_features.drop(labels=['Cabin'],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Segmenting Train and Test Data\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test=train_test_split(data_features,data_label,test_size=0.2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(712,)\n"
     ]
    }
   ],
   "source": [
    "print(y_train.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#One Hot Encoding of Data (A part of Data preprocessing) #This is not used at the moment\n",
    "from sklearn.preprocessing import OneHotEncoder\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "891 7\n"
     ]
    }
   ],
   "source": [
    "m,n=data_features.shape\n",
    "print(m,n)\n",
    "X=tf.placeholder(tf.float32,[None, n])\n",
    "Y=tf.placeholder(tf.float32,[None, 1])\n",
    "W=tf.Variable(tf.zeros([n, 1]))\n",
    "b=tf.Variable(tf.zeros([1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Hypothesis function\n",
    "Y_hat=tf.nn.sigmoid(tf.matmul(X,W))\n",
    "\n",
    "#Sigmoid Cross Entropy Function\n",
    "\n",
    "cost=tf.nn.sigmoid_cross_entropy_with_logits(logits=Y_hat,labels=Y)\n",
    "\n",
    "#Gradient Descent Optimizer\n",
    "\n",
    "optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)\n",
    "\n",
    "#Global Variables Initializer\n",
    "\n",
    "init=tf.global_variables_initializer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Now defining the training process\n",
    "\n",
    "with tf.Session() as sess:\n",
    "    sess.run(init)\n",
    "    \n",
    "    cost_history,accuracy_history=[], [] #Lists for storing the changing Cost and Accuracy in every Epoch\n",
    "    \n",
    "    for epoch in range (5):\n",
    "        \n",
    "        cost_per_epoch=0\n",
    "        \n",
    "        #Running the optimizer\n",
    "        \n",
    "        sess.run(optimizer, feed_dict={X:X_train, Y:y_train})\n",
    "        \n",
    "        #Calculating cost on current Epoch\n",
    "        c=sess.run(cost, feed_dict={X:X_train, Y:y_train})\n",
    "        \n",
    "        #Calculating accuracy on Current Epoch\n",
    "        correct_prediction=tf.equal(tf.argmax(Y_hat,1),tf.argmax(Y,1))\n",
    "        \n",
    "        #Displaying the epoch on current result\n",
    "        if epoch % 100==0 and epoch!=0:\n",
    "            print(\"Epoch\" + str(epoch) + \"Cost:\" +str (cost_history[-1]))\n",
    "            \n",
    "        Weight=sess.run(W)\n",
    "        \n",
    "        #Final accuracy\n",
    "        correct_prediction=tf.equal(tf.argmax(Y_hat,1),tf.argmax(Y,1))\n",
    "    \n",
    "    "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
